Import relavent files

Summary of School Utilization

g <- school_sfis_2017 %>%
  left_join(school_board_def, by = c('dsb.index'='dsb')) %>%
  group_by(dsb.index, panel) %>%
  summarise(
    utilization.total = mean(utilization.total),
    board_type_name = first(board_type_name)
  ) %>%
  ggplot(aes(x = dsb.index, y = utilization.total, fill = board_type_name)) +
  geom_col(position = 'dodge') +
  labs(x = 'DSB', y = 'Average Utilization', fill = 'Board Type') +
  theme_minimal()

g <- ggplotly(g)
g

Find Closest School for each Student

## Warning: funs() is soft deprecated as of dplyr 0.8.0
## please use list() instead
## 
##   # Before:
##   funs(name = f(.))
## 
##   # After: 
##   list(name = ~ f(.))
## This warning is displayed once per session.
## Regions defined for each Polygons

Spatial Gymnastics

#percentile_dist = c('90')
percentile_dist = c('90', '80', '70', '60', '50')

for (dist in percentile_dist) {
  
  # Loop through the different catchment areas by retriving the proper variable.
  student_travel <- get(paste0("student_travel_",dist))
  
  # Spatial transformation to XY
  student_xy <- create_student_xy(student_travel)
  school_xy <- create_school_xy(student_travel)
  
  # Create the overlays to get the TRESO zone each XY coordinate falls on
  student_overlay <- create_overlay(student_xy, treso_shp, "student")
  saveRDS(student_overlay, paste0("output/student_overlay_", dist, ".rds"))
  school_overlay <- create_overlay(school_xy, treso_shp, "school")
  
  # Join the POR and POS together for distance matrix
  observed_por_pos <- left_join(student_overlay, school_overlay, by = "school.name") %>%
    saveRDS(paste0("output/observed_por_pos_", dist, ".rds"))
  
  # Save the zones in each school's catchment area
  buffered_df <- buffer_zones(school_xy, treso_shp)
  saveRDS(buffered_df, paste0("output/school_catchment_treso_zones_", dist, ".rds"))
  
  # Combine the school's buffered TRESO zones with socio-economic info and group_by school name
  school_tb <- summarize_buffered_zones(buffered_df, treso_tb, school_sfis_2017, school_board_def, treso_zone_def) %>%
    saveRDS(paste0("output/school_tb_", dist, ".rds"))
}

Closest School Theory

## Saving 10 x 12 in image

Straight Line TLFD plots

90th Percentile plots

## Warning: Column `student.postal.code` joining character vector and factor,
## coercing into character vector

80th Percentile plots

## Warning: Column `student.postal.code` joining character vector and factor,
## coercing into character vector

70th Percentile plots

## Warning: Column `student.postal.code` joining character vector and factor,
## coercing into character vector

50th Percentile plots

## Warning: Column `student.postal.code` joining character vector and factor,
## coercing into character vector

Explore Linear Regressions

# Choose the catchment area to explore
school_tb <- readRDS("output/school_tb_90.rds") %>%
  mutate(shape.area_sum = shape.area_sum / 1e6) %>%
  mutate(n_pop_density = n_pop_sum / shape.area_sum) %>%
  mutate(n_sec_pop_density = n_sec_pop_sum / shape.area_sum) %>%
  mutate(n_ele_pop_density = n_ele_pop_sum / shape.area_sum)

summary(school_tb$ade)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    50.0   229.0   356.0   427.2   541.5  1975.0
g1 <- ggplot(filter(school_tb, panel == "Secondary"), aes(x=ade)) +
  geom_histogram(bins = 50) +
  facet_grid(rows = vars(mof.region), cols = vars(board.type.name), scales = 'free') +
  theme_minimal() +
  labs(x = 'ADE', y = 'Count')

g2 <- ggplot(filter(school_tb, panel == "Secondary"), aes(x = catchment.dist, y = ade, label=school.name.x)) +
  geom_point(alpha = 0.5) +
  facet_grid(rows = vars(mof.region), cols = vars(board.type.name), scales = 'free') +
  theme_minimal() +
  labs(x = 'Catchment Distance', y = 'ADE')

g3 <- ggplot(filter(school_tb, panel == "Secondary", mof.region == "GTA"), aes(x = n_sec_pop_density, y = ade, label=school.name.x)) +
  geom_point(alpha = 0.5) +
  facet_grid(rows = vars(mof.region), cols = vars(board.type.name), scales = 'free') +
  theme_minimal() +
  labs(x = 'Secondary Population Density', y = 'ADE')

g4 <- ggplot(filter(school_tb, panel == "Secondary"), aes(x = attend_school_sum, y = ade, label=school.name.x)) +
  geom_point(alpha = 0.5) +
  facet_grid(rows = vars(mof.region), cols = vars(board.type.name), scales = 'free') +
  theme_minimal() +
  labs(x = 'Attend School', y = 'ADE')

ggplotly(g1)
ggplotly(g2)
ggplotly(g3)
ggplotly(g4)
filter(school_tb, mof.region == '0')
## # A tibble: 0 x 80
## # ... with 80 variables: sfis <int>, n_pop_sum <dbl>, n_hhlds_sum <dbl>,
## #   n_ft_sum <dbl>, n_pt_sum <dbl>, n_unemp_sum <dbl>,
## #   n_sec_pop_sum <dbl>, n_ele_pop_sum <dbl>, n_pre_pop_sum <dbl>,
## #   n_zero_adult_sum <dbl>, n_one_adult_zero_child_sum <dbl>,
## #   n_one_adult_one_child_sum <dbl>, n_one_adult_two_child_sum <dbl>,
## #   n_one_adult_twoplus_child_sum <dbl>, n_two_adult_zero_child_sum <dbl>,
## #   n_two_adult_one_child_sum <dbl>, n_two_adult_two_child_sum <dbl>,
## #   n_two_adult_twoplus_child_sum <dbl>,
## #   n_twoplus_adult_zero_child_sum <dbl>,
## #   n_twoplus_adult_one_child_sum <dbl>,
## #   n_twoplus_adult_two_child_sum <dbl>,
## #   n_twoplus_adult_twoplus_child_sum <dbl>, occu_management_sum <dbl>,
## #   occu_business_sum <dbl>, occu_science_sum <dbl>,
## #   occu_health_sum <dbl>, occu_public_sum <dbl>,
## #   occu_recreation_sum <dbl>, occu_sales_sum <dbl>,
## #   occu_trades_sum <dbl>, occu_production_sum <dbl>,
## #   occu_manufacturing_sum <dbl>, occu_notapplicable_sum <dbl>,
## #   deg_none_sum <dbl>, deg_hs_sum <dbl>, deg_trades_sum <dbl>,
## #   deg_ra_sum <dbl>, deg_col_sum <dbl>, deg_uni_sum <dbl>,
## #   deg_ugrad_sum <dbl>, deg_grad_sum <dbl>, deg_na_sum <dbl>,
## #   attend_school_sum <dbl>, shape.area_sum <dbl>, mean.income <dbl>,
## #   mean.age <dbl>, dsb.index.x <dbl>, school.name.x <chr>,
## #   catchment.dist <dbl>, board.type.name <chr>, area <chr>,
## #   mof.region <chr>, year <int>, dsb.index.y <int>, board.name <chr>,
## #   panel <chr>, school.name.y <chr>, ade <dbl>, otg <dbl>,
## #   school.lat <dbl>, school.long <dbl>, status <chr>, dataset.id <int>,
## #   utilization <dbl>, year.y <int>, bsid <int>, ade.forecast <dbl>,
## #   ade.sec.forecast <dbl>, ade.elem.forecast <dbl>,
## #   ade.jksk.forecast <dbl>, ade.g1g3.forecast <dbl>,
## #   ade.g4g8.forecast <dbl>, area.m2 <dbl>, num.units.calculated <dbl>,
## #   capacity.portable <dbl>, capacity.total <dbl>,
## #   utilization.total <dbl>, n_pop_density <dbl>, n_sec_pop_density <dbl>,
## #   n_ele_pop_density <dbl>